Description:

This is a simple SIR model, built using COVID-19 data extracted from Wikipedia (link to source)

# Loading required libraries (silently)

suppressPackageStartupMessages({
  library(ggplot2)
  library(magrittr)
  library(rvest)
  library(xml2)
  library(dplyr)
  library(deSolve)
  library(plotly)
})

Loading the dataset

dat = read.csv("wiki_data.csv")
dat
# retrieving data related to infections, recoveries and deaths

infected = dat %>% select(!contains(c('_R', '_D')))
recovered = dat %>% select(contains(c('date', '_R')))
dead = dat %>% select(contains(c('date', '_D')))

Data Analysis

# cumulative infections over time
inf_plot_dat = infected
districts = colnames(inf_plot_dat)[-1]

inf_plot_dat = inf_plot_dat %>% mutate_at(districts, cumsum)

inf_plot = plot_ly()
for(col in districts){
  inf_plot = inf_plot %>% add_trace(x = inf_plot_dat[['date']], y = inf_plot_dat[[col]], name = col, type = 'scatter', mode = 'lines')
}
inf_plot = inf_plot %>% layout(title = 'Cumulative infections over time (days)', xaxis = list(title = 'Days'), yaxis = list(title = 'Number of people'))
inf_plot
# cumulative recoveries over time
rec_plot_dat = recovered
districts = colnames(rec_plot_dat)[-1]

rec_plot_dat = rec_plot_dat %>% mutate_at(districts, cumsum)

rec_plot = plot_ly()
for(col in districts){
  rec_plot = rec_plot %>% add_trace(x = rec_plot_dat[['date']], y = rec_plot_dat[[col]], name = col, type = 'scatter', mode = 'lines')
}
rec_plot = rec_plot %>% layout(title = 'Cumulative recoveries over time (days)', xaxis = list(title = 'Days'), yaxis = list(title = 'Number of people'))
rec_plot
# cumulative deaths over time
dead_plot_dat = dead
districts = colnames(dead_plot_dat)[-1]

dead_plot_dat = dead_plot_dat %>% mutate_at(districts, cumsum)

dead_plot = plot_ly()
for(col in districts){
  dead_plot = dead_plot %>% add_trace(x = dead_plot_dat[['date']], y = dead_plot_dat[[col]], name = col, type = 'scatter', mode = 'lines')
}
dead_plot = dead_plot %>% layout(title = 'Cumulative deaths over time (days)', xaxis = list(title = 'Days'), yaxis = list(title = 'Number of people'))
dead_plot

Observations from the three graphs:

  • The district of Hubei is the most affected district among the others, with over 37k confirmed cases and 1000+ deaths, with the city of Wuhan contributing to most cases.
  • The number of cases, recoveries and deaths in the other districts is significantly lower than observed in Hubei.

SIR Model

#getting data for fitting SIR model

sir_data = data.frame(time = 1:35)
sir_data$I = rowSums(infected[, -c(1, 33:34)])
sir_data$R = rowSums(recovered[, -1])

sir_data
#plotting the infected population

ggplot(data = sir_data) +
    geom_point(aes(x = time, y = I)) +
    labs(x = "Time (days)", y = "Number of infected people", title = 'Number of infected people over time') +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

# defining SIR model
model_SIR <-function(time, state, parameters){
  with(as.list(c(state, parameters)), {
    
    N = S+I+R
    lambda = beta * (I/N)
    
    dS = -lambda*S
    dI = lambda*S - gamma*I
    dR = gamma*I
    
    return(list(c(dS, dI, dR)))
  })
}
# defining sum-of-squares error function
SIR_SSE <- function(params, data){
  result = as.data.frame(ode(y = initial_state_values,
                             times = times,
                             func = model_SIR,
                             parms = params))
  
  delta = (result$I[result$time %in% data$time] - data$I)^2
  SSE = sum(delta)
  
  return(SSE)
}
# finding optimum beta and gamma values for the test population

initial_state_values = c(S = 1000000-1, I = 1, R = 0)

times = seq(from = 0, to = 60
            , by = 0.1)

optimised = optim(par = c(beta = 1, gamma = 0.5),
                  fn = SIR_SSE,
                  dat = sir_data)

optimised$par
    beta    gamma 
4.191642 3.822256 
#plotting real vs. SIR model data
opt_dat = as.data.frame(ode(y = initial_state_values,
                             times = times,
                             func = model_SIR,
                             parms = optimised$par))

plott = ggplot() 
plott = plott + geom_point(aes(x = time, y = I),colour = 'red', data = sir_data)
plott = plott + geom_line(aes(x = time, y = I), colour= 'blue', data = opt_dat)
plott = plott + labs(x = "Time (days)", y = "Number of infected people", title = 'Number of infected people over time')
plott

# calculating R0 value
R0 = optimised$par['beta']/optimised$par['gamma']
print(paste("The R0 value:", R0))
[1] "The R0 value: 1.09664087944329"

Observations from the graph and R0 value

  • We can see that our SIR model peaks to 4000 infections after about 25 days.
  • From the graph, it seems the disease is bought to a low with 50 to 60 days.
  • The R0 value (basic reproduction number) is just over 1, which means that every infected person infects about 1.09 people on average.
  • As this is a closed world, without taking population change and other factors, we see that the disease dies down by 60 days. Those factors do have an impact in our model, if we include them.
LS0tDQp0aXRsZTogIlNpbXBsZSBTSVIgTW9kZWwiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIyBEZXNjcmlwdGlvbjoNCg0KVGhpcyBpcyBhIHNpbXBsZSBTSVIgbW9kZWwsIGJ1aWx0IHVzaW5nIENPVklELTE5IGRhdGEgZXh0cmFjdGVkIGZyb20gV2lraXBlZGlhIChbbGluayB0byBzb3VyY2VdKGh0dHBzOi8vZW4ud2lraXBlZGlhLm9yZy93L2luZGV4LnBocD90aXRsZT1UZW1wbGF0ZToyMDE5JUUyJTgwJTkzMjBfV3VoYW5fY29yb25hdmlydXNfZGF0YS9DaGluYV9tZWRpY2FsX2Nhc2VzX2J5X3Byb3ZpbmNlJm9sZGlkPTk0MTIzNTY2MikpDQoNCmBgYHtyfQ0KIyBMb2FkaW5nIHJlcXVpcmVkIGxpYnJhcmllcyAoc2lsZW50bHkpDQoNCnN1cHByZXNzUGFja2FnZVN0YXJ0dXBNZXNzYWdlcyh7DQogIGxpYnJhcnkoZ2dwbG90MikNCiAgbGlicmFyeShtYWdyaXR0cikNCiAgbGlicmFyeShydmVzdCkNCiAgbGlicmFyeSh4bWwyKQ0KICBsaWJyYXJ5KGRwbHlyKQ0KICBsaWJyYXJ5KGRlU29sdmUpDQogIGxpYnJhcnkocGxvdGx5KQ0KfSkNCmBgYA0KDQojIyMgTG9hZGluZyB0aGUgZGF0YXNldA0KDQpgYGB7cn0NCmRhdCA9IHJlYWQuY3N2KCJ3aWtpX2RhdGEuY3N2IikNCmRhdA0KYGBgDQpgYGB7cn0NCiMgcmV0cmlldmluZyBkYXRhIHJlbGF0ZWQgdG8gaW5mZWN0aW9ucywgcmVjb3ZlcmllcyBhbmQgZGVhdGhzDQoNCmluZmVjdGVkID0gZGF0ICU+JSBzZWxlY3QoIWNvbnRhaW5zKGMoJ19SJywgJ19EJykpKQ0KcmVjb3ZlcmVkID0gZGF0ICU+JSBzZWxlY3QoY29udGFpbnMoYygnZGF0ZScsICdfUicpKSkNCmRlYWQgPSBkYXQgJT4lIHNlbGVjdChjb250YWlucyhjKCdkYXRlJywgJ19EJykpKQ0KDQpgYGANCg0KIyMjIERhdGEgQW5hbHlzaXMNCg0KYGBge3J9DQojIGN1bXVsYXRpdmUgaW5mZWN0aW9ucyBvdmVyIHRpbWUNCmluZl9wbG90X2RhdCA9IGluZmVjdGVkDQpkaXN0cmljdHMgPSBjb2xuYW1lcyhpbmZfcGxvdF9kYXQpWy0xXQ0KDQppbmZfcGxvdF9kYXQgPSBpbmZfcGxvdF9kYXQgJT4lIG11dGF0ZV9hdChkaXN0cmljdHMsIGN1bXN1bSkNCg0KaW5mX3Bsb3QgPSBwbG90X2x5KCkNCmZvcihjb2wgaW4gZGlzdHJpY3RzKXsNCiAgaW5mX3Bsb3QgPSBpbmZfcGxvdCAlPiUgYWRkX3RyYWNlKHggPSBpbmZfcGxvdF9kYXRbWydkYXRlJ11dLCB5ID0gaW5mX3Bsb3RfZGF0W1tjb2xdXSwgbmFtZSA9IGNvbCwgdHlwZSA9ICdzY2F0dGVyJywgbW9kZSA9ICdsaW5lcycpDQp9DQppbmZfcGxvdCA9IGluZl9wbG90ICU+JSBsYXlvdXQodGl0bGUgPSAnQ3VtdWxhdGl2ZSBpbmZlY3Rpb25zIG92ZXIgdGltZSAoZGF5cyknLCB4YXhpcyA9IGxpc3QodGl0bGUgPSAnRGF5cycpLCB5YXhpcyA9IGxpc3QodGl0bGUgPSAnTnVtYmVyIG9mIHBlb3BsZScpKQ0KaW5mX3Bsb3QNCmBgYA0KYGBge3J9DQojIGN1bXVsYXRpdmUgcmVjb3ZlcmllcyBvdmVyIHRpbWUNCnJlY19wbG90X2RhdCA9IHJlY292ZXJlZA0KZGlzdHJpY3RzID0gY29sbmFtZXMocmVjX3Bsb3RfZGF0KVstMV0NCg0KcmVjX3Bsb3RfZGF0ID0gcmVjX3Bsb3RfZGF0ICU+JSBtdXRhdGVfYXQoZGlzdHJpY3RzLCBjdW1zdW0pDQoNCnJlY19wbG90ID0gcGxvdF9seSgpDQpmb3IoY29sIGluIGRpc3RyaWN0cyl7DQogIHJlY19wbG90ID0gcmVjX3Bsb3QgJT4lIGFkZF90cmFjZSh4ID0gcmVjX3Bsb3RfZGF0W1snZGF0ZSddXSwgeSA9IHJlY19wbG90X2RhdFtbY29sXV0sIG5hbWUgPSBjb2wsIHR5cGUgPSAnc2NhdHRlcicsIG1vZGUgPSAnbGluZXMnKQ0KfQ0KcmVjX3Bsb3QgPSByZWNfcGxvdCAlPiUgbGF5b3V0KHRpdGxlID0gJ0N1bXVsYXRpdmUgcmVjb3ZlcmllcyBvdmVyIHRpbWUgKGRheXMpJywgeGF4aXMgPSBsaXN0KHRpdGxlID0gJ0RheXMnKSwgeWF4aXMgPSBsaXN0KHRpdGxlID0gJ051bWJlciBvZiBwZW9wbGUnKSkNCnJlY19wbG90DQpgYGANCg0KYGBge3J9DQojIGN1bXVsYXRpdmUgZGVhdGhzIG92ZXIgdGltZQ0KZGVhZF9wbG90X2RhdCA9IGRlYWQNCmRpc3RyaWN0cyA9IGNvbG5hbWVzKGRlYWRfcGxvdF9kYXQpWy0xXQ0KDQpkZWFkX3Bsb3RfZGF0ID0gZGVhZF9wbG90X2RhdCAlPiUgbXV0YXRlX2F0KGRpc3RyaWN0cywgY3Vtc3VtKQ0KDQpkZWFkX3Bsb3QgPSBwbG90X2x5KCkNCmZvcihjb2wgaW4gZGlzdHJpY3RzKXsNCiAgZGVhZF9wbG90ID0gZGVhZF9wbG90ICU+JSBhZGRfdHJhY2UoeCA9IGRlYWRfcGxvdF9kYXRbWydkYXRlJ11dLCB5ID0gZGVhZF9wbG90X2RhdFtbY29sXV0sIG5hbWUgPSBjb2wsIHR5cGUgPSAnc2NhdHRlcicsIG1vZGUgPSAnbGluZXMnKQ0KfQ0KZGVhZF9wbG90ID0gZGVhZF9wbG90ICU+JSBsYXlvdXQodGl0bGUgPSAnQ3VtdWxhdGl2ZSBkZWF0aHMgb3ZlciB0aW1lIChkYXlzKScsIHhheGlzID0gbGlzdCh0aXRsZSA9ICdEYXlzJyksIHlheGlzID0gbGlzdCh0aXRsZSA9ICdOdW1iZXIgb2YgcGVvcGxlJykpDQpkZWFkX3Bsb3QNCmBgYA0KDQojIyMjIE9ic2VydmF0aW9ucyBmcm9tIHRoZSB0aHJlZSBncmFwaHM6DQogIC0gVGhlIGRpc3RyaWN0IG9mIEh1YmVpIGlzIHRoZSBtb3N0IGFmZmVjdGVkIGRpc3RyaWN0IGFtb25nIHRoZSBvdGhlcnMsIHdpdGggb3ZlciAzN2sgY29uZmlybWVkIGNhc2VzIGFuZCAxMDAwKyBkZWF0aHMsIHdpdGggdGhlIGNpdHkgb2YgV3VoYW4gY29udHJpYnV0aW5nIHRvIG1vc3QgY2FzZXMuDQogIC0gVGhlIG51bWJlciBvZiBjYXNlcywgcmVjb3ZlcmllcyBhbmQgZGVhdGhzIGluIHRoZSBvdGhlciBkaXN0cmljdHMgaXMgc2lnbmlmaWNhbnRseSBsb3dlciB0aGFuIG9ic2VydmVkIGluIEh1YmVpLiANCg0KDQoNCiMjIyBTSVIgTW9kZWwNCg0KYGBge3J9DQojZ2V0dGluZyBkYXRhIGZvciBmaXR0aW5nIFNJUiBtb2RlbA0KDQpzaXJfZGF0YSA9IGRhdGEuZnJhbWUodGltZSA9IDE6MzUpDQpzaXJfZGF0YSRJID0gcm93U3VtcyhpbmZlY3RlZFssIC1jKDEsIDMzOjM0KV0pDQpzaXJfZGF0YSRSID0gcm93U3VtcyhyZWNvdmVyZWRbLCAtMV0pDQoNCnNpcl9kYXRhDQpgYGANCg0KYGBge3J9DQojcGxvdHRpbmcgdGhlIGluZmVjdGVkIHBvcHVsYXRpb24NCg0KZ2dwbG90KGRhdGEgPSBzaXJfZGF0YSkgKw0KICAgIGdlb21fcG9pbnQoYWVzKHggPSB0aW1lLCB5ID0gSSkpICsNCiAgICBsYWJzKHggPSAiVGltZSAoZGF5cykiLCB5ID0gIk51bWJlciBvZiBpbmZlY3RlZCBwZW9wbGUiLCB0aXRsZSA9ICdOdW1iZXIgb2YgaW5mZWN0ZWQgcGVvcGxlIG92ZXIgdGltZScpICsNCiAgICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDkwLCB2anVzdCA9IDAuNSwgaGp1c3Q9MSkpDQpgYGANCg0KDQpgYGB7cn0NCiMgZGVmaW5pbmcgU0lSIG1vZGVsDQptb2RlbF9TSVIgPC1mdW5jdGlvbih0aW1lLCBzdGF0ZSwgcGFyYW1ldGVycyl7DQogIHdpdGgoYXMubGlzdChjKHN0YXRlLCBwYXJhbWV0ZXJzKSksIHsNCiAgICANCiAgICBOID0gUytJK1INCiAgICBsYW1iZGEgPSBiZXRhICogKEkvTikNCiAgICANCiAgICBkUyA9IC1sYW1iZGEqUw0KICAgIGRJID0gbGFtYmRhKlMgLSBnYW1tYSpJDQogICAgZFIgPSBnYW1tYSpJDQogICAgDQogICAgcmV0dXJuKGxpc3QoYyhkUywgZEksIGRSKSkpDQogIH0pDQp9DQpgYGANCg0KYGBge3J9DQojIGRlZmluaW5nIHN1bS1vZi1zcXVhcmVzIGVycm9yIGZ1bmN0aW9uDQpTSVJfU1NFIDwtIGZ1bmN0aW9uKHBhcmFtcywgZGF0YSl7DQogIHJlc3VsdCA9IGFzLmRhdGEuZnJhbWUob2RlKHkgPSBpbml0aWFsX3N0YXRlX3ZhbHVlcywNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdGltZXMgPSB0aW1lcywNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZnVuYyA9IG1vZGVsX1NJUiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcGFybXMgPSBwYXJhbXMpKQ0KICANCiAgZGVsdGEgPSAocmVzdWx0JElbcmVzdWx0JHRpbWUgJWluJSBkYXRhJHRpbWVdIC0gZGF0YSRJKV4yDQogIFNTRSA9IHN1bShkZWx0YSkNCiAgDQogIHJldHVybihTU0UpDQp9DQpgYGANCg0KYGBge3J9DQojIGZpbmRpbmcgb3B0aW11bSBiZXRhIGFuZCBnYW1tYSB2YWx1ZXMgZm9yIHRoZSB0ZXN0IHBvcHVsYXRpb24NCg0KaW5pdGlhbF9zdGF0ZV92YWx1ZXMgPSBjKFMgPSAxMDAwMDAwLTEsIEkgPSAxLCBSID0gMCkNCg0KdGltZXMgPSBzZXEoZnJvbSA9IDAsIHRvID0gNjANCiAgICAgICAgICAgICwgYnkgPSAwLjEpDQoNCm9wdGltaXNlZCA9IG9wdGltKHBhciA9IGMoYmV0YSA9IDEsIGdhbW1hID0gMC41KSwNCiAgICAgICAgICAgICAgICAgIGZuID0gU0lSX1NTRSwNCiAgICAgICAgICAgICAgICAgIGRhdCA9IHNpcl9kYXRhKQ0KDQpvcHRpbWlzZWQkcGFyDQpgYGANCg0KYGBge3J9DQojcGxvdHRpbmcgcmVhbCB2cy4gU0lSIG1vZGVsIGRhdGENCm9wdF9kYXQgPSBhcy5kYXRhLmZyYW1lKG9kZSh5ID0gaW5pdGlhbF9zdGF0ZV92YWx1ZXMsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgIHRpbWVzID0gdGltZXMsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgIGZ1bmMgPSBtb2RlbF9TSVIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgIHBhcm1zID0gb3B0aW1pc2VkJHBhcikpDQoNCnBsb3R0ID0gZ2dwbG90KCkgDQpwbG90dCA9IHBsb3R0ICsgZ2VvbV9wb2ludChhZXMoeCA9IHRpbWUsIHkgPSBJKSxjb2xvdXIgPSAncmVkJywgZGF0YSA9IHNpcl9kYXRhKQ0KcGxvdHQgPSBwbG90dCArIGdlb21fbGluZShhZXMoeCA9IHRpbWUsIHkgPSBJKSwgY29sb3VyPSAnYmx1ZScsIGRhdGEgPSBvcHRfZGF0KQ0KcGxvdHQgPSBwbG90dCArIGxhYnMoeCA9ICJUaW1lIChkYXlzKSIsIHkgPSAiTnVtYmVyIG9mIGluZmVjdGVkIHBlb3BsZSIsIHRpdGxlID0gJ051bWJlciBvZiBpbmZlY3RlZCBwZW9wbGUgb3ZlciB0aW1lJykNCnBsb3R0DQpgYGANCg0KYGBge3J9DQojIGNhbGN1bGF0aW5nIFIwIHZhbHVlDQpSMCA9IG9wdGltaXNlZCRwYXJbJ2JldGEnXS9vcHRpbWlzZWQkcGFyWydnYW1tYSddDQpwcmludChwYXN0ZSgiVGhlIFIwIHZhbHVlOiIsIFIwKSkNCmBgYA0KIyMjIyBPYnNlcnZhdGlvbnMgZnJvbSB0aGUgZ3JhcGggYW5kIFIwIHZhbHVlDQogIC0gV2UgY2FuIHNlZSB0aGF0IG91ciBTSVIgbW9kZWwgcGVha3MgdG8gNDAwMCBpbmZlY3Rpb25zIGFmdGVyIGFib3V0IDI1IGRheXMuDQogIC0gRnJvbSB0aGUgZ3JhcGgsIGl0IHNlZW1zIHRoZSBkaXNlYXNlIGlzIGJvdWdodCB0byBhIGxvdyB3aXRoIDUwIHRvIDYwIGRheXMuDQogIC0gVGhlIFIwIHZhbHVlIChiYXNpYyByZXByb2R1Y3Rpb24gbnVtYmVyKSBpcyBqdXN0IG92ZXIgMSwgd2hpY2ggbWVhbnMgdGhhdCBldmVyeSBpbmZlY3RlZCBwZXJzb24gaW5mZWN0cyBhYm91dCAxLjA5IHBlb3BsZSBvbiBhdmVyYWdlLiANCiAgLSBBcyB0aGlzIGlzIGEgY2xvc2VkIHdvcmxkLCB3aXRob3V0IHRha2luZyBwb3B1bGF0aW9uIGNoYW5nZSBhbmQgb3RoZXIgZmFjdG9ycywgd2Ugc2VlIHRoYXQgdGhlIGRpc2Vhc2UgZGllcyBkb3duIGJ5IDYwIGRheXMuIFRob3NlIGZhY3RvcnMgZG8gaGF2ZSBhbiBpbXBhY3QgaW4gb3VyIG1vZGVsLCBpZiB3ZSBpbmNsdWRlIHRoZW0uDQogIA0KICANCiAgDQogIA0KICANCiAgDQogIA0KICANCiAgDQogIA0K